library(tidyverse)
library(pheatmap)
library(splots)
library(patchwork)
library(synergyfinder)
library(ggsignif)
library(broom)
library(ggrepel)
## read ctg data files
ctg_data <- lapply(list.files("~/tas/data/ctg_data/", full.names = T,
pattern = "TAS"), function(f) read_tsv(f, col_names = F,
col_types = "cci") %>% `colnames<-`(c("screen", "well", "pcount"))) %>%
bind_rows() %>% separate(screen, c("date", "operator", "mithras",
"experiment_id"), sep = "_", remove = FALSE) %>% separate(experiment_id,
c("experiment", "line", "TAS_concentration")) %>% # join compound annotation data
full_join(., lapply(list.files("~/tas/data/annotations/", full.names = T),
function(f) read_csv(f)) %>% bind_rows() %>% rename(well = destination.well) %>%
select(drug, well, Source_Position, Source) %>% separate(well,
c("row", "col"), sep = 1, remove = FALSE) %>% mutate(drug = if_else(is.na(drug),
Source, drug), drug = if_else(well == "J19", "DMSO", drug),
combination = if_else(col == "13", FALSE, TRUE))) %>% unite(id,
c("date", "line", "TAS_concentration", "experiment"), remove = FALSE) %>%
mutate(row_num = match(row, LETTERS[1:26]), col_num = as.numeric(col)) %>%
# annotate concentrations
mutate(concentration = ifelse(Source_Position == "Rack", NA,
ifelse(Source_Position == "Master_Library_Plate", 1, ifelse(Source_Position ==
"Dilution_Plate_1", 2, ifelse(Source_Position == "Dilution_Plate_2",
3, ifelse(Source_Position == "Dilution_Plate_3", 4, ifelse(Source_Position ==
"Dilution_Plate_4", 5, "no match")))))))
On the second screening day some plates were read-out twice. They are labeled TAS and TASa. I check if there are profound differences between these plates. First I correlate the pcount of these plates.
ctg_data %>% filter(date == "171212" & line == "19") %>% select(pcount,
well, id) %>% spread(id, pcount) %>% select(-well) %>% as.data.frame() %>%
cor() %>% pheatmap()
The plates are virtually the same. I also checked differences between zfactors, which are not apparent. I keep the plates with the experiment name “TAS” for simplicity.
ctg_data <- ctg_data %>% filter(experiment != "TASa")
Now I want to gain a broad overview of the asssay plates.
ctg_data %>% select(id, pcount) %>% split(., .$id) %>% lapply(.,
function(f) f$pcount %>% as.vector) %>% plotScreen(., 6)
Now I focus on column and row-wise effects. Row wise effects dominate clearly
ctg_data %>% ggplot(aes(row, pcount)) + geom_point() + geom_smooth() +
facet_wrap(~id) + theme_bw() + ggtitle("Spatial effects of rows") +
# next figure
ctg_data %>% ggplot(aes(col, pcount)) + geom_point() + geom_smooth() +
facet_wrap(~id) + theme_bw() + ggtitle("Spatial effects of columns")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
I see some spatial effects on most screening plates. The effect is mostly restricted to row-wise differences. I apply a conservative loss smoothing to correct for these.
## split data by plate and apply loess normalization
ctg_loess <- ctg_data %>% dplyr::select(row_num, col_num, pcount,
id) %>% # na values
drop_na() %>% # set the ctrl column 13 to NA mutate(pcount = ifelse(col_num
# == 13, NA, pcount)) %>%
split(.$id) %>% lapply(function(s) {
## loess fit. family is 'symmetric' to be robust to outliers
fit <- loess(pcount ~ row_num + col_num, data = s, family = "symmetric")
## apply normalization
tibble(norm_fac = fit$fitted) %>% cbind(s %>% drop_na(),
.) %>% mutate(pcount_norm = pcount - (norm_fac - median(norm_fac)))
}) %>% bind_rows() %>% full_join(ctg_data)
Now I plot the results.
ctg_loess %>% ggplot(aes(row, pcount_norm)) + geom_point() +
geom_smooth() + facet_wrap(~id) + theme_bw() + ggtitle("Spatial effects of rows") +
# next figure
ctg_loess %>% ggplot(aes(col, pcount_norm)) + geom_point() +
geom_smooth() + facet_wrap(~id) + theme_bw() + ggtitle("Spatial effects of columns")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
For demo purposes I show one plate before and after correction:
ctg_data %>% filter(TAS_concentration == "1") %>% ggplot(aes(row,
pcount)) + geom_point() + # geom_smooth() +
facet_wrap(~line) + theme_bw() + ggtitle("Before Normalization: Spatial effects of rows") +
# and after normalization
ctg_loess %>% filter(TAS_concentration == "1") %>% ggplot(aes(row,
pcount_norm)) + geom_point() + geom_smooth() + facet_wrap(~line) +
theme_bw() + ggtitle("After Normalization:Spatial effects of rows")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Now I look at the distribution of controls in the first screening replicate.
coi <- c("Stauro_500nM", "DMSO")
ctg_loess %>% mutate(line_date_exp = paste0(line, "_", date,
"_", experiment)) %>% filter(drug %in% coi) %>% ggplot(aes(drug,
pcount_norm, color = combination)) + geom_jitter(alpha = 0.7) +
facet_grid(line_date_exp ~ TAS_concentration) + theme_bw()
The distribution seems to be quiet good. I now go on calculating the z-factors for each plate for combined and uncombined wells.
ctg_loess %>% filter(drug %in% coi) %>% group_by(id, drug, combination,
TAS_concentration, line, date, experiment) %>% summarise(sd = sd(pcount_norm,
na.rm = TRUE), mean = mean(pcount_norm, na.rm = TRUE)) %>%
group_by(id, combination, TAS_concentration, line, date,
experiment) %>% summarise(zfactor = 1 - ((3 * sum(sd))/abs(range(mean)[1] -
range(mean)[2]))) %>% mutate(qc = if_else(zfactor < 0.25,
FALSE, TRUE)) %>% ggplot(aes(combination, zfactor, color = qc,
shape = date)) + geom_point(alpha = 0.7, size = 5) + facet_grid(line ~
TAS_concentration) + theme_bw() + geom_hline(yintercept = 0.25)
Also the z-factors are okay for most plates. Three plates have slight losses in assay quality, however, this seems still acceptable.
Again, I take a look at the TAS-untreated ctrls. They should be the same across plates
coi <- c("DMSO", "Stauro_500nM")
ctg_loess %>% filter(drug %in% coi & combination == FALSE) %>%
ggplot(aes(TAS_concentration, pcount_norm)) + geom_boxplot(aes(colour = drug),
width = 6) + theme_bw() + facet_grid(line ~ date) + # scale_y_continuous(limits = c(-1,250000)) +
ggtitle("Before DMSO normalization") + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
There are some line specific differences in viability and one outlier plate 171128_27_4. I now normalize the plates to the untreated DMSO control.
ctg_ln <- ctg_loess %>% full_join(., ctg_loess %>% filter(drug ==
"DMSO" & combination == FALSE) %>% group_by(id) %>% summarise(ctrl = median(pcount_norm,
na.rm = TRUE)) %>% ungroup()) %>% group_by(id) %>% mutate(rv = pcount_norm/ctrl) %>%
ungroup()
## Joining, by = "id"
Now I look at the controls after normalization.
coi <- c("DMSO", "Stauro_500nM")
ctg_ln %>% filter(drug %in% coi & combination == FALSE) %>% ggplot(aes(TAS_concentration,
rv)) + geom_boxplot(aes(colour = drug), width = 6) + theme_bw() +
facet_grid(line ~ date) + # scale_y_continuous(limits = c(-1,250000)) +
ggtitle("After DMSO normalization") + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
The normalization worked well. As intended there are controlled line and TAS dependent effects left.
ctg_ln %>% ggplot(aes(TAS_concentration, rv)) + geom_boxplot(width = 6) +
theme_bw() + facet_grid(line ~ date) + # scale_y_continuous(limits = c(-1,250000)) +
ggtitle("Relative viability measurments for every plate") + theme(axis.text.x = element_text(angle = 90,
hjust = 1))
I wonder if I have to center the dataset further inbetween 1 and 0. Therefore I plot a histogram of all viabilities.
ctg_ln %>% ggplot(aes(rv)) + geom_histogram(bins = 90) + theme_classic() +
geom_vline(xintercept = c(0, 1)) + xlab("relative viability")
As an alternative approach I trim the data between 1 and 0, assuming that these datapoints are only spread and normalization artifacts.
ctg_lnt <- ctg_ln %>% mutate(rv_trim = ifelse(rv < 0, 0, rv),
rv_trim = ifelse(rv > 1, 1, rv_trim))
ctg_lnt %>% ggplot(aes(rv_trim)) + geom_histogram(bins = 90) +
theme_classic() + geom_vline(xintercept = c(0, 1)) + xlab("relative viability")
I am fine with this distribution. Let’s move on. Before we continue I take a look at the overall correlation.
coi <- c("DMSO", "Stauro_500nM")
tmp <- ctg_ln %>% unite("line_well", c("line", "well", "TAS_concentration")) %>%
ungroup() %>% mutate(Replicate = ifelse(date == "171128",
1, 2)) %>% dplyr::select(line_well, Replicate, rv, drug) %>%
spread(Replicate, rv) %>% mutate(ctrl = if_else(drug %in%
coi, TRUE, FALSE))
tmp.cor <- tmp %>% dplyr::select(`1`, `2`) %>% cor(., use = "pairwise.complete.obs") %>%
.[1, 2] %>% round(2)
tmp %>% ggplot(aes(x = `1`, y = `2`)) + geom_point(alpha = 0.3) +
theme_classic() + ylab("Replicate 2") + xlab("Replicate 1") +
ggtitle(paste0("correlation of normalized viabiliy, r =",
tmp.cor)) + geom_abline(slope = 1) + # geom_density2d(alpha = 0.7, color = 'black') +
scale_x_continuous(breaks = c(0, 0.5, 1)) + scale_y_continuous(breaks = c(0,
0.5, 1)) + geom_hline(yintercept = 1) + geom_vline(xintercept = 1)
# stat_binhex()
I consider using the “synergyfinder” package, published by He et al. at the FIMM. First I have to wrangle the data into a standardized input format. For this I write an ugly function.
prepare_synergy <- function(df, controls){
#not a universal function
syn_drug <- df %>%
filter(!drug %in% controls & combination == TRUE) %>%
#setup the easy stuff
mutate(BlockID = paste("TAS-102", drug, sep = "_"),
Response = rv*100,
Replicate = ifelse(date == "171128", 1, 2),
DrugRow = "TAS-102",
DrugCol = drug,
#build matrix
Col = ifelse(drug != "DMSO", as.numeric(concentration), 6),
Row = ifelse(TAS_concentration != "0", as.numeric(TAS_concentration), 6),
#define concentrations
ConcRow = ifelse(TAS_concentration != "0", 25/(5^(as.numeric(TAS_concentration)-1)), 0), #assuming the initila concentration is 50
ConcCol = 10/(5^(as.numeric(concentration)-1)),
ConcUnit = "uM"
)
syn_ctrl <- full_join(syn_drug %>% select(BlockID, line, TAS_concentration, Replicate) %>% distinct(),
#add TAS DMSO controls
df %>%
filter(drug == "DMSO" & combination == TRUE) %>%
mutate(Replicate = ifelse(date == "171128", 1, 2)) %>%
group_by(line, TAS_concentration, Replicate) %>%
mutate(Response = median(rv, na.rm = TRUE)*100) %>%
mutate(#define concentrations
ConcRow = ifelse(TAS_concentration != "0", 50/(5^(as.numeric(TAS_concentration)-1)), 0), #assuming the initila concentration is 50
ConcCol = 0,
Col = 6,
Row = ifelse(TAS_concentration != "0", as.numeric(TAS_concentration), 6),
DrugRow = "TAS-102",
ConcUnit = "uM"
)) %>%
#hacky way of formatting the data
mutate(DrugCol = substr(BlockID, 9, nchar(BlockID)))
syn_mat <- full_join(syn_drug, syn_ctrl) %>% dplyr::select(BlockID, Col, Row, Response, Replicate, DrugRow, DrugCol, ConcRow, ConcCol, ConcUnit, line) %>% distinct()
return(syn_mat)
}
Now I apply this function.
coi <- c("DMSO", "Stauro_500nM")
syn_mat <- prepare_synergy(ctg_lnt, coi)
## Joining, by = c("line", "TAS_concentration", "Replicate")
## Joining, by = c("row_num", "col_num", "pcount", "id", "norm_fac", "pcount_norm", "screen", "date", "operator", "mithras", "experiment", "line", "TAS_concentration", "well", "drug", "row", "col", "Source_Position", "Source", "combination", "concentration", "ctrl", "rv", "rv_trim", "BlockID", "Response", "Replicate", "DrugRow", "DrugCol", "Col", "Row", "ConcRow", "ConcCol", "ConcUnit")
# I also determine the synergy for the trimmed dataset
syn_mat_t <- prepare_synergy(ctg_lnt %>% dplyr::select(-rv) %>%
rename(rv = rv_trim), coi)
## Joining, by = c("line", "TAS_concentration", "Replicate")
## Joining, by = c("row_num", "col_num", "pcount", "id", "norm_fac", "pcount_norm", "screen", "date", "operator", "mithras", "experiment", "line", "TAS_concentration", "well", "drug", "row", "col", "Source_Position", "Source", "combination", "concentration", "ctrl", "rv", "BlockID", "Response", "Replicate", "DrugRow", "DrugCol", "Col", "Row", "ConcRow", "ConcCol", "ConcUnit")
Now I define a wrapper function that can apply multiple synergy calculations and report the results in a tidy format. For now I stick to calulating the average synergy scores.
tidy_synergy = function(df, me){ lapply(as.list(me), function(m, f = df){
#hacky way of ignoring cases in which the synergy scores could not be calculated
return(tryCatch(f %>%
ReshapeData(., data.type = "viability") %>%
#PlotDoseResponse() %>%
CalculateSynergy(method = m, correction = TRUE) %>%
tibble(BlockID = .$drug.pairs %>% simplify() %>% .[4],
method = .$method,
score_average = .$scores %>% .[[1]] %>% .[-1,-1] %>% simplify() %>% mean(),
score_sd = .$scores %>% .[[1]] %>% .[-1,-1] %>% simplify() %>% sd()) %>%
select(BlockID, method, score_average, score_sd) %>% distinct(),
error=function(e) NULL))}
) %>% bind_rows()
#PlotSynergy(type = "all")
}
I apply the function on the dataset.
tas_syn <- syn_mat %>% group_by(line, Replicate, BlockID) %>%
do(tidy_synergy(df = ., me = c("ZIP", "HSA", "Bliss", "Loewe"))) %>%
ungroup() %>% # complete the df for scores that could not be calculated
mutate(method = as.factor(method), line = as.factor(line)) %>%
complete(BlockID, method, Replicate, line) %>% mutate(score_average = ifelse(is.nan(score_average),
NA, score_average))
As an alternative I calculate synergy scores only for selected query concentrations. I start out by skipping the highest tested (TAS) concentration.
syn_mat_low <- syn_mat %>% filter(Row != 1) %>% mutate(Row = Row -
1)
# syn_mat_low <- syn_mat %>% filter(Row != 1 & Col != 1) %>%
# mutate(Row = Row -1, Col = Col -1)
tas_syn_l <- syn_mat_low %>% group_by(line, Replicate, BlockID) %>%
do(tidy_synergy(df = ., me = c("ZIP", "HSA", "Bliss", "Loewe"))) %>%
ungroup() %>% # complete the df for scores that could not be calculated
mutate(method = as.factor(method), line = as.factor(line)) %>%
complete(BlockID, method, Replicate, line) %>% mutate(score_average = ifelse(is.nan(score_average),
NA, score_average))
First I wonder how many models were fitted in both approaches.
df <- tibble(complete = tas_syn$score_average, wo_highest = tas_syn_l$score_average)
tibble(complete_na = df$complete %>% is.na() %>% sum(), wo_highest_na = df$wo_highest %>%
is.na() %>% sum(), frac_comp = complete_na/nrow(df), frac_wo_hi = wo_highest_na/nrow(df))
Now I am interested in the correltaion between synergy scores
tibble(complete = tas_syn$score_average, wo_highest = tas_syn_l$score_average) %>%
ggplot(aes(complete, wo_highest)) + geom_point() + theme_classic()
## Warning: Removed 301 rows containing missing values (geom_point).
tibble(complete = tas_syn$score_average, wo_highest = tas_syn_l$score_average) %>%
corrr::correlate()
For now I add the synergy data which was calculated without the highest concentration to the dataset.
tas_syn <- full_join(tas_syn, tas_syn_l %>% rename(score_average_l = score_average,
score_sd_l = score_sd))
## Joining, by = c("BlockID", "method", "Replicate", "line")
Now I want to inspect the distribution of synergy scores.
tas_syn %>% ggplot(aes(score_average)) + geom_histogram() + facet_grid(line ~
method)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 167 rows containing non-finite values (stat_bin).
For now, I will stick to to ZIP method which calculates a delta-score. In recent publications this score is >15 for strong interactions and <-10 for antagonistic combinations. I observed that on multiple occasions not all statistics could be calculated. I want to see how often theses errors happen.
# tas_syn %>% group_by(BlockID, method) %>% summarise(n =
# sum(is.na(score_average))) %>% filter(n != 0) %>%
# ggplot(aes(n)) + geom_histogram() + facet_grid(~ method)
# define list
fb <- tas_syn %>% group_by(BlockID, method) %>% summarise(n = sum(is.na(score_average))) %>%
filter(n > 4) %>% .$BlockID
Errors during calculation happen in about 10% of all drug combinations, probably due to an inability to a model for the data. I define a list of combinations which will be excluded for now due to too sparse data.
tas_test <- lapply(as.list(tas_syn %>% filter(!BlockID %in% fb) %>%
.$BlockID %>% unique()), function(boi) tas_syn %>% filter(!BlockID %in%
fb) %>% mutate(test = if_else(BlockID == boi, TRUE, FALSE),
f_method = method) %>% group_by(f_method) %>% do(tidy(t.test(score_average ~
test, data = .))) %>% mutate(BlockID = boi)) %>% bind_rows() %>%
group_by(f_method) %>% mutate(p.adj = p.value %>% p.adjust(method = "BH"))
tas_test %>% ggplot(aes(p.adj)) + geom_histogram() + facet_wrap(~f_method) +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Now I am interested in compounds which are significantly different from the rest of all tested combinations.
tas_test %>% group_by(BlockID) %>% mutate(avg_p = mean(p.adj,
na.rm = TRUE)) %>% arrange(avg_p) %>% ungroup() %>% mutate(BlockID = factor(BlockID,
levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, p.adj)) +
geom_point() + theme_minimal() + scale_y_log10() + facet_wrap(~f_method) +
geom_hline(yintercept = 0.05) + geom_label_repel(data = tas_test %>%
filter(p.adj < 0.05), aes(label = BlockID)) + theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
Next to one combination with strong synergism I also look for other potent combinations. In the following plot I show the synergy scores for all categories and in red the synergy score based on the drop-out data.
tas_syn %>% group_by(BlockID, method) %>% mutate(avg_s = mean(score_average,
na.rm = TRUE), avg_sl = mean(score_average_l, na.rm = TRUE)) %>%
group_by(BlockID) %>% mutate(avg_a = mean(score_average,
na.rm = TRUE)) %>% arrange(avg_a) %>% ungroup() %>% mutate(BlockID = factor(BlockID,
levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, avg_s)) +
geom_point(color = "black") + geom_point(aes(BlockID, avg_sl),
color = "red") + theme_classic() + geom_hline(yintercept = 0) +
# scale_y_log10() +
facet_wrap(~method) + # geom_hline(yintercept = 0.05) +
geom_label_repel(data = tas_syn %>% group_by(BlockID, method) %>%
summarise(avg_s = mean(score_average, na.rm = TRUE)) %>%
group_by(method) %>% do(arrange(., desc(avg_s)) %>% head(3)),
aes(label = BlockID), hjust = -0.1, vjust = -0.1) + ylab("Average Synergy Score") +
xlab("Drug Combinations") + theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
## Warning: Ignoring unknown parameters: hjust, vjust
Now I invert the color scheme and arrange compounds according to the drop-out synergies. In blue I show the “complete” synergy scores.
tas_syn %>% group_by(BlockID, method) %>% mutate(avg_s = mean(score_average,
na.rm = TRUE), avg_sl = mean(score_average_l, na.rm = TRUE)) %>%
group_by(BlockID) %>% mutate(avg_al = mean(score_average_l,
na.rm = TRUE)) %>% arrange(avg_al) %>% ungroup() %>% mutate(BlockID = factor(BlockID,
levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, avg_sl)) +
geom_point(color = "black") + geom_point(aes(BlockID, avg_s),
color = "blue") + theme_classic() + geom_hline(yintercept = 0) +
# scale_y_log10() +
facet_wrap(~method) + # geom_hline(yintercept = 0.05) +
geom_label_repel(data = tas_syn %>% group_by(BlockID, method) %>%
summarise(avg_sl = mean(score_average_l, na.rm = TRUE)) %>%
group_by(method) %>% do(arrange(., desc(avg_sl)) %>% head(3)),
aes(label = BlockID), hjust = -0.1, vjust = -0.1) + ylab("Average Synergy Score") +
xlab("Drug Combinations") + theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
## Warning: Ignoring unknown parameters: hjust, vjust
The Bliss score is an established measure of drug synergy. Therefore I plot it in more detail. First I plot only the complete averages across all lines and replicates.
tas_syn %>% filter(method == "Bliss") %>% group_by(BlockID, method) %>%
mutate(avg_s = mean(score_average, na.rm = TRUE)) %>% group_by(BlockID) %>%
mutate(avg_a = mean(score_average, na.rm = TRUE)) %>% arrange(avg_a) %>%
ungroup() %>% mutate(BlockID = factor(BlockID, levels = BlockID %>%
unique())) %>% ggplot(aes(BlockID, avg_s)) + geom_point() +
theme_classic() + geom_hline(yintercept = 0) + # scale_y_log10() + geom_hline(yintercept = 0.05) +
geom_label_repel(data = tas_syn %>% filter(method == "Bliss") %>%
group_by(BlockID, method) %>% summarise(avg_s = mean(score_average,
na.rm = TRUE)) %>% group_by(method) %>% do(arrange(., desc(avg_s)) %>%
head(5)), aes(label = BlockID), hjust = -0.1, vjust = -0.1) +
ylab("Bliss Synergy Score") + xlab("Drug Combinations") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
## Warning: Ignoring unknown parameters: hjust, vjust
To see if there are line specific differences, I plot the data in a boxplot format. The first plot shows the complete synergy dataset.
tas_syn %>% filter(method == "ZIP") %>% group_by(BlockID, method) %>%
mutate(avg_s = mean(score_average, na.rm = TRUE), avg_sl = mean(score_average_l,
na.rm = TRUE)) %>% group_by(BlockID) %>% mutate(avg_a = mean(score_average,
na.rm = TRUE)) %>% arrange(avg_a) %>% ungroup() %>% mutate(BlockID = factor(BlockID,
levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, score_average)) +
geom_boxplot() + geom_point(aes(color = line)) + theme_classic() +
geom_hline(yintercept = 0) + # scale_y_log10() + geom_hline(yintercept = 0.05) +
ylab("ZIP Synergy Score") + xlab("Drug Combinations") + theme(axis.text.x = element_text(angle = 90,
hjust = 1), axis.ticks.x = element_blank())
## Warning: Removed 39 rows containing non-finite values (stat_boxplot).
## Warning: Removed 39 rows containing missing values (geom_point).
Now I take a look at the “dropout” dataset in a similiar way.
tas_syn %>% filter(method == "ZIP") %>% group_by(BlockID, method) %>%
mutate(avg_s = mean(score_average, na.rm = TRUE), avg_sl = mean(score_average_l,
na.rm = TRUE)) %>% group_by(BlockID) %>% mutate(avg_al = mean(score_average_l,
na.rm = TRUE)) %>% arrange(avg_al) %>% ungroup() %>% mutate(BlockID = factor(BlockID,
levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, score_average_l)) +
geom_boxplot() + geom_point(aes(color = line)) + theme_classic() +
geom_hline(yintercept = 0) + # scale_y_log10() + geom_hline(yintercept = 0.05) +
ylab("ZIP Synergy Score") + xlab("Drug Combinations") + theme(axis.text.x = element_text(angle = 90,
hjust = 1), axis.ticks.x = element_blank())
## Warning: Removed 44 rows containing non-finite values (stat_boxplot).
## Warning: Removed 44 rows containing missing values (geom_point).
tas_syn %>% unite(exp, c(method, Replicate, line)) %>% dplyr::select(exp,
BlockID, score_average, BlockID) %>% as.data.frame() %>%
spread(exp, score_average) %>% remove_rownames() %>% column_to_rownames(.,
var = "BlockID") %>% drop_na() %>% pheatmap()
Now I take a closer look at some drug combinations
boi = c("TAS-102_MK-8776/ SCH900776")
moi = "ZIP"
tas_syn %>% mutate(boi = if_else(BlockID %in% boi, TRUE, FALSE)) %>%
filter(method == moi) %>% ggplot(aes(as.factor(boi), score_average,
group = boi)) + geom_jitter(aes(color = line), width = 0.3) +
stat_summary(fun.y = "mean", fun.ymin = "mean", fun.ymax = "mean",
colour = "black", geom = "crossbar", width = 0.5) + geom_signif(comparisons = list(c("TRUE",
"FALSE")), method = "t.test") + theme_classic() + ggtitle(paste0(boi,
" method:", moi))
## Warning: Ignoring unknown parameters: method
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## Warning: Removed 39 rows containing non-finite values (stat_signif).
## Warning: Removed 39 rows containing missing values (geom_point).
syn_mat %>% filter(line == "07", BlockID == boi, Replicate ==
1) %>% ReshapeData(., data.type = "viability") %>% CalculateSynergy(method = "ZIP",
correction = TRUE) %>% PlotSynergy(type = "all", save.file = FALSE)
boi = c("TAS-102_Birinapant")
moi = "ZIP"
tas_syn %>% mutate(boi = if_else(BlockID %in% boi, TRUE, FALSE)) %>%
filter(method == moi) %>% ggplot(aes(as.factor(boi), score_average,
group = boi)) + geom_jitter(aes(color = line), width = 0.3) +
stat_summary(fun.y = "mean", fun.ymin = "mean", fun.ymax = "mean",
colour = "black", geom = "crossbar", width = 0.5) + geom_signif(comparisons = list(c("TRUE",
"FALSE")), method = "t.test") + theme_classic() + ggtitle(paste0(boi,
" method:", moi))
## Warning: Ignoring unknown parameters: method
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## Warning: Removed 39 rows containing non-finite values (stat_signif).
## Warning: Removed 39 rows containing missing values (geom_point).
syn_mat %>% filter(line == "07", BlockID == boi, Replicate ==
1) %>% ReshapeData(., data.type = "viability") %>% CalculateSynergy(method = "ZIP",
correction = TRUE) %>% PlotSynergy(type = "all", save.file = FALSE)
boi = c("TAS-102_AZD 5363")
moi = "ZIP"
tas_syn %>% mutate(boi = if_else(BlockID %in% boi, TRUE, FALSE)) %>%
filter(method == moi) %>% ggplot(aes(as.factor(boi), score_average,
group = boi)) + geom_jitter(aes(color = line), width = 0.3) +
stat_summary(fun.y = "mean", fun.ymin = "mean", fun.ymax = "mean",
colour = "black", geom = "crossbar", width = 0.5) + geom_signif(comparisons = list(c("TRUE",
"FALSE")), method = "t.test") + theme_classic() + ggtitle(paste0(boi,
" method:", moi))
## Warning: Ignoring unknown parameters: method
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## Warning: Removed 39 rows containing non-finite values (stat_signif).
## Warning: Removed 39 rows containing missing values (geom_point).
syn_mat %>% filter(line == "07", BlockID == boi, Replicate ==
1) %>% ReshapeData(., data.type = "viability") %>% CalculateSynergy(method = "ZIP",
correction = TRUE) %>% PlotSynergy(type = "all", save.file = FALSE)
boi = c("TAS-102_Bosutinib")
moi = "ZIP"
tas_syn %>% mutate(boi = if_else(BlockID %in% boi, TRUE, FALSE)) %>%
filter(method == moi) %>% ggplot(aes(as.factor(boi), score_average,
group = boi)) + geom_jitter(aes(color = line), width = 0.3) +
stat_summary(fun.y = "mean", fun.ymin = "mean", fun.ymax = "mean",
colour = "black", geom = "crossbar", width = 0.5) + geom_signif(comparisons = list(c("TRUE",
"FALSE")), method = "t.test") + theme_classic() + ggtitle(paste0(boi,
" method:", moi))
## Warning: Ignoring unknown parameters: method
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## Warning: Removed 39 rows containing non-finite values (stat_signif).
## Warning: Removed 39 rows containing missing values (geom_point).
syn_mat %>% filter(line == "07", BlockID == boi, Replicate ==
1) %>% ReshapeData(., data.type = "viability") %>% CalculateSynergy(method = "ZIP",
correction = TRUE) %>% PlotSynergy(type = "all", save.file = FALSE)
Add library data and look for enrichments.